# -*- coding: utf-8 -*-
# time: 2025/4/10 16:12
# file: hugging_dataLoad.py
# author: hanson
"""
Hugging Face Datasets 库中的 load_dataset 函数
load_dataset 是 Hugging Face datasets 库的核心函数之一，用于 加载 Hugging Face Hub 上的公开数据集，
"""
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification, AutoModel

model_id = "facebook/opt-350m"
# 加载基础模型
model = AutoModelForCausalLM.from_pretrained(model_id)

# 加载数据集
dataset = load_dataset("glue", "mrpc")

# 加载预训练模型和分词器
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


### 下面又是一个例子，展示了如何使用 load_dataset 函数加载一个公开数据集，并使用预训练的模型进行文本分类。


# 使用AutoModel加载基础模型
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)